library(here)
library(tidyverse)
theme_set(theme_light())

library(scuttle)
library(iCOBRA)

library(kableExtra)
library(DDCompanion)

Setup

## Directory setup
here_root <- "benchmarks/lupus-n_patients"
here::i_am(file.path(here_root, "analysis/lupus-n_patients-sim-results.Rmd"))
#> here() starts at /Users/jg/Desktop/PhD/DD_project/DD_git

res_dir <- here::here(here_root, "results")
fig_dir <- here::here(here_root, "figures")
  • Using n_patients: 10 20 30
  • Using methods: edgeR_QP, edgeR_NB, qbGLM_offset_squeeze, bGLM, qbGLM, qbGLM_offset
  • Using celltype: T4_naive
  • Using prop_DE: 0.05

Load results

# TODO: also add all-patients results for comparison?


res_files <- map(n_patients, ~ get_sim_res_files(
    dataset = "lupus-n_patients",
    methods = methods,
    prop_DE = prop_DE,
    celltype = celltype,
    datatype = "pb",
    n_patients = .x
)) %>%
    set_names(paste0("n_patients_", n_patients))

res_list <- map_depth(res_files, 2, readRDS)
str(res_list, max.level = 3)
#> List of 3
#>  $ n_patients_10:List of 6
#>   ..$ edgeR_QP            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_NB            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset_squeeze:List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ bGLM                :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM               :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset        :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>  $ n_patients_20:List of 6
#>   ..$ edgeR_QP            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_NB            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset_squeeze:List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ bGLM                :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM               :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset        :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>  $ n_patients_30:List of 6
#>   ..$ edgeR_QP            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ edgeR_NB            :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset_squeeze:List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ bGLM                :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM               :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2
#>   ..$ qbGLM_offset        :List of 5
#>   .. ..$ replicate_1:List of 2
#>   .. ..$ replicate_2:List of 2
#>   .. ..$ replicate_3:List of 2
#>   .. ..$ replicate_4:List of 2
#>   .. ..$ replicate_5:List of 2

Load SCE objects

data_files <- map(n_patients, ~ get_SCE_files(
    dataset = "lupus-n_patients", which = "sim_replicates",
    celltype = celltype, n_patients = .x, prop_DE = prop_DE
)) %>%
    set_names(paste0("n_patients_", n_patients))
sce_objects <- map(data_files, readRDS)
str(sce_objects, max.level = 3)
#> List of 3
#>  $ n_patients_10:List of 5
#>   ..$ replicate_1:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_2:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_3:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_4:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_5:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>  $ n_patients_20:List of 5
#>   ..$ replicate_1:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_2:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_3:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_4:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_5:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>  $ n_patients_30:List of 5
#>   ..$ replicate_1:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_2:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_3:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_4:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots
#>   ..$ replicate_5:Formal class 'SingleCellExperiment' [package "SingleCellExperiment"] with 9 slots

Data overview

  • The results were generated from 5 mock replicates
  • Each replicate was generated by randomly splitting the subjects in two mock groups
  • No sub-sampling of cells per patient was performed for this data
  • DE was introduced by randomly swapping genes in one of the mock groups
  • The number of patients was randomly sub-sampled stratified by mock group

Note that for n_patients 10 and 20, the selected patients vary randomly between the replicates, which is why the nrows and ncols are not identical. For the n_patients 30, however, no random sub-sampling was done because I could just use all patients from 2 out of the original 3 batches from the Control samples. That’s why the ncols are identical for those subsets.

The mock group assignment of each subject also varies across replicates.

map_dfr(sce_objects,
    ~ map_dfr(.x, function(x) c(nrows = nrow(x), ncols = ncol(x)),
        .id = "replicate"
    ),
    .id = "n_patients"
)

Subjects are divided across mock groups as follows:

n_patients = 10

replicate_1

A B
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 1 0
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_2

A B
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 1 0
IGTB1901_IGTB1901 1 0
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 0 1
IGTB2065_IGTB2065 1 0

replicate_3

A B
IGTB1762_IGTB1762 0 1
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 0 1
IGTB1952_IGTB1952 1 0
IGTB1966_IGTB1966 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 0 1

replicate_4

A B
IGTB1762_IGTB1762 0 1
IGTB1793_IGTB1793 0 1
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 1 0
IGTB1895_IGTB1895 1 0
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_5

A B
IGTB1789_IGTB1789 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 1 0
IGTB1895_IGTB1895 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 0 1
IGTB2065_IGTB2065 0 1

n_patients = 20

replicate_1

A B
IGTB143_IGTB143 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 0 1
IGTB508_IGTB508 1 0
IGTB514_IGTB514 0 1
IGTB645_IGTB645 0 1
IGTB670_IGTB670 1 0
IGTB826_IGTB826 1 0
IGTB1539_IGTB1539 1 0
IGTB1762_IGTB1762 1 0
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 0 1
IGTB1901_IGTB1901 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_2

A B
IGTB141_IGTB141 1 0
IGTB195_IGTB195 1 0
IGTB469_IGTB469 1 0
IGTB514_IGTB514 0 1
IGTB645_IGTB645 1 0
IGTB670_IGTB670 0 1
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1575_IGTB1575 0 1
IGTB1789_IGTB1789 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 1 0
IGTB2065_IGTB2065 1 0

replicate_3

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 0 1
IGTB195_IGTB195 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 1 0
IGTB514_IGTB514 1 0
IGTB826_IGTB826 0 1
IGTB1372_IGTB1372 1 0
IGTB1575_IGTB1575 0 1
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 1 0
IGTB1901_IGTB1901 0 1
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 1 0
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0

replicate_4

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 0 1
IGTB508_IGTB508 1 0
IGTB670_IGTB670 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 0 1
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 1 0
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 0 1
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_5

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 0 1
IGTB508_IGTB508 1 0
IGTB514_IGTB514 1 0
IGTB645_IGTB645 1 0
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 0 1
IGTB1575_IGTB1575 1 0
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 1 0
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 1 0
IGTB1901_IGTB1901 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 0 1
IGTB2065_IGTB2065 0 1

n_patients = 30

replicate_1

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 1 0
IGTB195_IGTB195 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 0 1
IGTB508_IGTB508 1 0
IGTB514_IGTB514 0 1
IGTB645_IGTB645 0 1
IGTB670_IGTB670 1 0
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 1 0
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 0 1
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 1 0

replicate_2

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 1 0
IGTB195_IGTB195 1 0
IGTB469_IGTB469 1 0
IGTB498_IGTB498 1 0
IGTB508_IGTB508 1 0
IGTB514_IGTB514 0 1
IGTB645_IGTB645 1 0
IGTB670_IGTB670 0 1
IGTB826_IGTB826 0 1
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 0 1
IGTB1575_IGTB1575 0 1
IGTB1762_IGTB1762 1 0
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 0 1
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 1 0
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 1 0
IGTB1996_IGTB1996 0 1
IGTB2007_IGTB2007 0 1
IGTB2065_IGTB2065 1 0

replicate_3

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 0 1
IGTB195_IGTB195 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 1 0
IGTB508_IGTB508 1 0
IGTB514_IGTB514 1 0
IGTB645_IGTB645 1 0
IGTB670_IGTB670 0 1
IGTB826_IGTB826 0 1
IGTB1372_IGTB1372 1 0
IGTB1506_IGTB1506 1 0
IGTB1539_IGTB1539 0 1
IGTB1575_IGTB1575 0 1
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 1 0
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 0 1
IGTB1901_IGTB1901 0 1
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 1 0
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 0 1
IGTB2065_IGTB2065 1 0

replicate_4

A B
IGTB141_IGTB141 1 0
IGTB143_IGTB143 0 1
IGTB195_IGTB195 0 1
IGTB469_IGTB469 1 0
IGTB498_IGTB498 0 1
IGTB508_IGTB508 1 0
IGTB514_IGTB514 1 0
IGTB645_IGTB645 1 0
IGTB670_IGTB670 0 1
IGTB826_IGTB826 0 1
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 0 1
IGTB1539_IGTB1539 0 1
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 0 1
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 0 1
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 0 1
IGTB1906_IGTB1906 1 0
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 1 0
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 1 0
IGTB2065_IGTB2065 0 1

replicate_5

A B
IGTB141_IGTB141 0 1
IGTB143_IGTB143 0 1
IGTB195_IGTB195 1 0
IGTB469_IGTB469 0 1
IGTB498_IGTB498 1 0
IGTB508_IGTB508 1 0
IGTB514_IGTB514 1 0
IGTB645_IGTB645 1 0
IGTB670_IGTB670 0 1
IGTB826_IGTB826 1 0
IGTB1372_IGTB1372 0 1
IGTB1506_IGTB1506 1 0
IGTB1539_IGTB1539 0 1
IGTB1575_IGTB1575 1 0
IGTB1762_IGTB1762 0 1
IGTB1789_IGTB1789 0 1
IGTB1793_IGTB1793 1 0
IGTB1815_IGTB1815 1 0
IGTB1828_IGTB1828 1 0
IGTB1840_IGTB1840 1 0
IGTB1871_IGTB1871 1 0
IGTB1895_IGTB1895 1 0
IGTB1901_IGTB1901 0 1
IGTB1906_IGTB1906 0 1
IGTB1952_IGTB1952 0 1
IGTB1966_IGTB1966 0 1
IGTB1982_IGTB1982 0 1
IGTB1996_IGTB1996 1 0
IGTB2007_IGTB2007 0 1
IGTB2065_IGTB2065 0 1

The number of DE and non-DE genes per replicate:

map(sce_objects, ~ map_dfr(.x, ~ table(rowData(.x)$is_DE), .id = "replicate"))
#> $n_patients_10
#> # A tibble: 5 × 3
#>   replicate   `FALSE` `TRUE` 
#>   <chr>       <table> <table>
#> 1 replicate_1 3792    201    
#> 2 replicate_2 3799    201    
#> 3 replicate_3 3801    201    
#> 4 replicate_4 3786    201    
#> 5 replicate_5 3794    201    
#> 
#> $n_patients_20
#> # A tibble: 5 × 3
#>   replicate   `FALSE` `TRUE` 
#>   <chr>       <table> <table>
#> 1 replicate_1 3659    199    
#> 2 replicate_2 3609    199    
#> 3 replicate_3 3639    199    
#> 4 replicate_4 3759    201    
#> 5 replicate_5 3696    201    
#> 
#> $n_patients_30
#> # A tibble: 5 × 3
#>   replicate   `FALSE` `TRUE` 
#>   <chr>       <table> <table>
#> 1 replicate_1 3637    198    
#> 2 replicate_2 3642    200    
#> 3 replicate_3 3638    200    
#> 4 replicate_4 3634    201    
#> 5 replicate_5 3634    200

t-SNE plots

tSNE_plots <- map(sce_objects, function(x) {
    p <- imap(x, function(sce, name) {
        scater::plotTSNE(sce, colour_by = "mock_group") +
            ggtitle(name)
    })
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

n_patients = 10

n_patients = 20

n_patients = 30

Extract results of interest

Runtimes

## Get runtimes for each celltype
runtimes <- map_dfr(res_list,
    ~ map_dfr(.x, get_runtimes, depth = 1, .id = "method"),
    .id = "n_patients"
) %>%
    mutate(n_patients = as.numeric(sub("n_patients_", "", n_patients)))

P-values

res_tables <- map_depth(res_list, 2, get_aggregated_rep_tables, depth = 1)
res_tables <- map(res_tables, ~ combine_tables(.x, .id = "method"))

Visualize results

Run times

ggplot(runtimes, aes(n_patients, time, col = method)) +
    geom_jitter(width = 0.5, alpha = 0.6) +
    geom_smooth(se = FALSE, method = "lm", formula = y ~ x) +
    labs(x = "Number of patients", y = "Time (seconds)", color = NULL) +
    scale_x_continuous(breaks = unique(runtimes$n_patients), minor_breaks = NULL)

P-value distributions for non-DE genes

non_de_res <- map2(sce_objects, res_tables, function(sce_list, res) {
    by_rep <- split(res, res$replicate)
    out <- map2(sce_list, by_rep, function(sce, tbl) {
        ## Select only non-DE genes
        non_de <- rownames(sce)[!rowData(sce)$is_DE]
        tbl[tbl$gene %in% non_de, ]
    })
    bind_rows(out, .id = "replicate")
})
non_de_pval_figs <- map(non_de_res, ~ pval_hist(.x))

n_patients_10

n_patients_20

n_patients_30

Performance evaluation with iCOBRA

Prepare Data

P-values for missing genes are set to 1.

cobra_data <- map2(res_tables, sce_objects, function(res_table, sce_list) {
    ## Split up results per replicate
    res_per_replicate <- split(res_table, res_table$replicate)
    map2(res_per_replicate, sce_list, prepare_COBRAData, replace_missing = TRUE)
})

cobra_perf <- map_depth(cobra_data, 2, calculate_performance, binary_truth = "status")

cobra_objects <- map_depth(cobra_perf, 2, prepare_data_for_plot)

FDR-TPR curves

fdr_tpr_plots <- map(cobra_objects, function(cobra_list) {
    p <- imap(cobra_list, ~ plot_fdrtprcurve(.x, title = .y))
    patchwork::wrap_plots(p, ncol = 3, guides = "collect")
})

n_patients_10

n_patients_20

n_patients_30

FDR-TPR plots averaged across replicate

## Working points
fdr_tpr_points <- map(cobra_objects, combine_fdrtpr_tables) %>%
    bind_rows(.id = "n_patients") %>%
    mutate(n_patients = as.numeric(sub("n_patients_", "", n_patients)))

fdr_tpr_points_averaged <- fdr_tpr_points %>%
    group_by(thr, method, n_patients) %>%
    summarize(across(c(FDR, TPR), mean), .groups = "keep")
plot_fdrtpr_points(fdr_tpr_points_averaged) +
    facet_wrap(vars(n_patients), labeller = "label_both") +
    xlim(c(0, 0.45)) + ylim(c(0, 1))
#> Warning: Removed 3 row(s) containing missing values (geom_path).
#> Warning: Removed 3 rows containing missing values (geom_point).

FDR control at threshold 0.05

use_thr <- 0.05
fdr_data <- filter(fdr_tpr_points, thr == use_thr)

plot_fdr_control(fdr_data, use_thr = use_thr) +
    ylim(c(0, 1)) +
    facet_wrap(vars(n_patients), labeller = "label_both")

Session info

Session info
#> [1] "2023-04-11 09:53:53 CEST"
#> Local:    main /Users/jg/Desktop/PhD/DD_project/DD_git
#> Remote:   main @ origin (git@github.com:jgilis/DD_benchmark.git)
#> Head:     [a917bf9] 2023-04-08: generate publication figures
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.1.3 (2022-03-10)
#>  os       macOS Big Sur/Monterey 10.16
#>  system   x86_64, darwin17.0
#>  ui       X11
#>  language (EN)
#>  collate  C
#>  ctype    UTF-8
#>  tz       Europe/Brussels
#>  date     2023-04-11
#>  pandoc   2.17.1.1 @ /Users/jg/opt/anaconda3/bin/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  !  package              * version    date (UTC) lib source
#>  P  argparse             * 2.1.2      2021-10-21 [?] CRAN (R 4.1.0)
#>     assertthat             0.2.1      2019-03-21 [2] CRAN (R 4.1.0)
#>  P  backports              1.3.0      2021-10-27 [?] CRAN (R 4.1.0)
#>     beachmat               2.10.0     2021-10-26 [2] Bioconductor
#>     beeswarm               0.4.0      2021-06-01 [2] CRAN (R 4.1.0)
#>     Biobase              * 2.54.0     2021-10-26 [2] Bioconductor
#>     BiocGenerics         * 0.40.0     2021-10-26 [2] Bioconductor
#>  P  BiocManager            1.30.16    2021-06-15 [?] CRAN (R 4.1.0)
#>     BiocNeighbors          1.12.0     2021-10-26 [2] Bioconductor
#>  P  BiocParallel           1.28.1     2021-11-18 [?] Bioconductor
#>     BiocSingular           1.10.0     2021-10-26 [2] Bioconductor
#>  P  bitops                 1.0-7      2021-04-24 [?] CRAN (R 4.1.0)
#>     bluster                1.4.0      2021-10-26 [2] Bioconductor
#>  P  broom                  0.7.10     2021-10-31 [?] CRAN (R 4.1.0)
#>  P  bslib                  0.3.1      2021-10-06 [?] CRAN (R 4.1.0)
#>     cellranger             1.1.0      2016-07-27 [2] CRAN (R 4.1.0)
#>  P  cli                    3.6.1      2023-03-23 [?] CRAN (R 4.1.2)
#>  P  cluster                2.1.2      2021-04-17 [?] CRAN (R 4.1.0)
#>  P  colorspace             2.0-2      2021-06-24 [?] CRAN (R 4.1.0)
#>     cowplot                1.1.1      2020-12-30 [2] CRAN (R 4.1.0)
#>  P  crayon                 1.4.2      2021-10-29 [?] CRAN (R 4.1.0)
#>  P  DBI                    1.1.1      2021-01-15 [?] CRAN (R 4.1.0)
#>     dbplyr                 2.1.1      2021-04-06 [2] CRAN (R 4.1.0)
#>     DDCompanion          * 0.1.2      2023-04-07 [1] local
#>     DelayedArray           0.20.0     2021-10-26 [2] Bioconductor
#>     DelayedMatrixStats     1.16.0     2021-10-26 [2] Bioconductor
#>  P  digest                 0.6.28     2021-09-23 [?] CRAN (R 4.1.0)
#>  P  dplyr                * 1.0.7      2021-06-18 [?] CRAN (R 4.1.0)
#>     dqrng                  0.3.0      2021-05-01 [2] CRAN (R 4.1.0)
#>  P  DT                     0.20       2021-11-15 [?] CRAN (R 4.1.0)
#>     edgeR                  3.36.0     2021-10-26 [2] Bioconductor
#>     ellipsis               0.3.2      2021-04-29 [2] CRAN (R 4.1.0)
#>  P  evaluate               0.14       2019-05-28 [?] CRAN (R 4.1.0)
#>  P  fansi                  1.0.2      2022-01-14 [?] CRAN (R 4.1.2)
#>  P  farver                 2.1.0      2021-02-28 [?] CRAN (R 4.1.0)
#>     fastmap                1.1.0      2021-01-25 [2] CRAN (R 4.1.0)
#>  P  findpython             1.0.7      2021-01-27 [?] CRAN (R 4.1.0)
#>     forcats              * 0.5.1      2021-01-27 [2] CRAN (R 4.1.0)
#>  P  fs                     1.6.1      2023-02-06 [?] CRAN (R 4.1.2)
#>  P  generics               0.1.1      2021-10-25 [?] CRAN (R 4.1.0)
#>  P  GenomeInfoDb         * 1.30.0     2021-10-26 [?] Bioconductor
#>     GenomeInfoDbData       1.2.7      2021-12-18 [2] Bioconductor
#>     GenomicRanges        * 1.46.1     2021-11-18 [2] Bioconductor
#>     ggbeeswarm             0.6.0      2017-08-07 [2] CRAN (R 4.1.0)
#>  P  ggplot2              * 3.3.5.9000 2023-03-30 [?] Github (tidyverse/ggplot2@199eb90)
#>     ggrepel                0.9.1      2021-01-15 [2] CRAN (R 4.1.0)
#>  P  git2r                  0.29.0     2021-11-22 [?] CRAN (R 4.1.0)
#>  P  glue                   1.6.0      2021-12-17 [?] CRAN (R 4.1.0)
#>     gridExtra              2.3        2017-09-09 [2] CRAN (R 4.1.0)
#>  P  gtable                 0.3.0      2019-03-25 [?] CRAN (R 4.1.0)
#>  P  haven                  2.4.3      2021-08-04 [?] CRAN (R 4.1.0)
#>     here                 * 1.0.1      2020-12-13 [2] CRAN (R 4.1.0)
#>     highr                  0.9        2021-04-16 [2] CRAN (R 4.1.0)
#>     hms                    1.1.1      2021-09-26 [2] CRAN (R 4.1.0)
#>  P  htmltools              0.5.2      2021-08-25 [?] CRAN (R 4.1.0)
#>     htmlwidgets            1.5.4      2021-09-08 [2] CRAN (R 4.1.0)
#>  P  httpuv                 1.6.3      2021-09-09 [?] CRAN (R 4.1.0)
#>  P  httr                   1.4.2      2020-07-20 [?] CRAN (R 4.1.0)
#>  P  iCOBRA               * 1.22.1     2021-11-03 [?] Bioconductor
#>  P  igraph                 1.2.8      2021-11-07 [?] CRAN (R 4.1.0)
#>     IRanges              * 2.28.0     2021-10-26 [2] Bioconductor
#>  P  irlba                  2.3.3      2019-02-05 [?] CRAN (R 4.1.0)
#>     jquerylib              0.1.4      2021-04-26 [2] CRAN (R 4.1.0)
#>  P  jsonlite               1.7.2      2020-12-09 [?] CRAN (R 4.1.0)
#>  P  kableExtra           * 1.3.4      2021-02-20 [?] CRAN (R 4.1.2)
#>  P  knitr                  1.36       2021-09-29 [?] CRAN (R 4.1.0)
#>     labeling               0.4.2      2020-10-20 [2] CRAN (R 4.1.0)
#>     later                  1.3.0      2021-08-18 [2] CRAN (R 4.1.0)
#>     lattice                0.20-45    2021-09-22 [2] CRAN (R 4.1.3)
#>  P  lifecycle              1.0.1      2021-09-24 [?] CRAN (R 4.1.0)
#>  P  limma                  3.50.0     2021-10-26 [?] Bioconductor
#>  P  locfit                 1.5-9.4    2020-03-25 [?] CRAN (R 4.1.0)
#>     lubridate              1.8.0      2021-10-07 [2] CRAN (R 4.1.0)
#>  P  magrittr               2.0.1      2020-11-17 [?] CRAN (R 4.1.0)
#>  P  Matrix                 1.3-4      2021-06-01 [?] CRAN (R 4.1.0)
#>     MatrixGenerics       * 1.6.0      2021-10-26 [2] Bioconductor
#>  P  matrixStats          * 0.61.0     2021-09-17 [?] CRAN (R 4.1.0)
#>     metapod                1.2.0      2021-10-26 [2] Bioconductor
#>  P  mgcv                   1.8-38     2021-10-06 [?] CRAN (R 4.1.0)
#>     mime                   0.12       2021-09-28 [2] CRAN (R 4.1.0)
#>     modelr                 0.1.8      2020-05-19 [2] CRAN (R 4.1.0)
#>     munsell                0.5.0      2018-06-12 [2] CRAN (R 4.1.0)
#>  P  nlme                   3.1-153    2021-09-07 [?] CRAN (R 4.1.0)
#>  P  patchwork              1.1.1      2020-12-17 [?] CRAN (R 4.1.0)
#>  P  pillar                 1.6.4      2021-10-18 [?] CRAN (R 4.1.0)
#>     pkgconfig              2.0.3      2019-09-22 [2] CRAN (R 4.1.0)
#>  P  plyr                   1.8.6      2020-03-03 [?] CRAN (R 4.1.0)
#>     promises               1.2.0.1    2021-02-11 [2] CRAN (R 4.1.0)
#>  P  purrr                * 0.3.4      2020-04-17 [?] CRAN (R 4.1.0)
#>     R6                     2.5.1      2021-08-19 [2] CRAN (R 4.1.0)
#>  P  Rcpp                   1.0.7      2021-07-07 [?] CRAN (R 4.1.0)
#>  P  RCurl                  1.98-1.5   2021-09-17 [?] CRAN (R 4.1.0)
#>  P  readr                * 2.1.0      2021-11-11 [?] CRAN (R 4.1.0)
#>  P  readxl                 1.3.1      2019-03-13 [?] CRAN (R 4.1.0)
#>     renv                   0.15.5     2022-05-26 [1] CRAN (R 4.1.2)
#>     reprex                 2.0.1      2021-08-05 [2] CRAN (R 4.1.0)
#>     reshape2               1.4.4      2020-04-09 [2] CRAN (R 4.1.0)
#>  P  rlang                  1.1.0      2023-03-14 [?] CRAN (R 4.1.2)
#>  P  rmarkdown              2.16       2022-08-24 [?] CRAN (R 4.1.3)
#>  P  ROCR                   1.0-11     2020-05-02 [?] CRAN (R 4.1.0)
#>  VP rprojroot              2.0.3      2020-11-15 [2] CRAN (R 4.1.0) (on disk 2.0.2)
#>     rstudioapi             0.13       2020-11-12 [2] CRAN (R 4.1.0)
#>     rsvd                   1.0.5      2021-04-16 [2] CRAN (R 4.1.0)
#>     rvest                  1.0.2      2021-10-16 [2] CRAN (R 4.1.0)
#>  P  S4Vectors            * 0.32.2     2021-11-07 [?] Bioconductor
#>  P  sass                   0.4.0      2021-05-12 [?] CRAN (R 4.1.0)
#>     ScaledMatrix           1.2.0      2021-10-26 [2] Bioconductor
#>  P  scales                 1.1.1      2020-05-11 [?] CRAN (R 4.1.0)
#>     scater                 1.22.0     2021-10-26 [2] Bioconductor
#>     scran                  1.22.1     2021-11-14 [2] Bioconductor
#>     scuttle              * 1.4.0      2021-10-26 [2] Bioconductor
#>  P  sessioninfo            1.2.2      2021-12-06 [?] CRAN (R 4.1.0)
#>  P  shiny                  1.7.1      2021-10-02 [?] CRAN (R 4.1.0)
#>  P  shinyBS                0.61       2015-03-31 [?] CRAN (R 4.1.0)
#>     shinydashboard         0.7.2      2021-09-30 [2] CRAN (R 4.1.0)
#>     SingleCellExperiment * 1.16.0     2021-10-26 [2] Bioconductor
#>     sparseMatrixStats      1.6.0      2021-10-26 [2] Bioconductor
#>  P  statmod                1.4.36     2021-05-10 [?] CRAN (R 4.1.0)
#>  P  stringi                1.7.5      2021-10-04 [?] CRAN (R 4.1.0)
#>  P  stringr              * 1.4.0      2019-02-10 [?] CRAN (R 4.1.0)
#>     SummarizedExperiment * 1.24.0     2021-10-26 [2] Bioconductor
#>  P  svglite                2.0.0      2021-02-20 [?] CRAN (R 4.1.0)
#>  P  systemfonts            1.0.3      2021-10-13 [?] CRAN (R 4.1.2)
#>  P  tibble               * 3.1.6      2021-11-07 [?] CRAN (R 4.1.0)
#>  P  tidyr                * 1.1.4      2021-09-27 [?] CRAN (R 4.1.0)
#>  P  tidyselect             1.1.1      2021-04-30 [?] CRAN (R 4.1.0)
#>     tidyverse            * 1.3.1      2021-04-15 [2] CRAN (R 4.1.0)
#>  P  tzdb                   0.2.0      2021-10-27 [?] CRAN (R 4.1.0)
#>     UpSetR                 1.4.0      2019-05-22 [2] CRAN (R 4.1.0)
#>  P  utf8                   1.2.2      2021-07-24 [?] CRAN (R 4.1.0)
#>  P  vctrs                  0.3.8      2021-04-29 [?] CRAN (R 4.1.0)
#>     vipor                  0.4.5      2017-03-22 [2] CRAN (R 4.1.0)
#>     viridis                0.6.2      2021-10-13 [2] CRAN (R 4.1.0)
#>  P  viridisLite            0.4.0      2021-04-13 [?] CRAN (R 4.1.0)
#>  P  webshot                0.5.2      2019-11-22 [?] CRAN (R 4.1.0)
#>  P  withr                  2.4.2      2021-04-18 [?] CRAN (R 4.1.0)
#>  P  xfun                   0.33       2022-09-12 [?] CRAN (R 4.1.3)
#>  P  xml2                   1.3.2      2020-04-23 [?] CRAN (R 4.1.0)
#>     xtable                 1.8-4      2019-04-21 [2] CRAN (R 4.1.0)
#>     XVector                0.34.0     2021-10-26 [2] Bioconductor
#>  P  yaml                   2.2.1      2020-02-01 [?] CRAN (R 4.1.0)
#>     zlibbioc               1.40.0     2021-10-26 [2] Bioconductor
#> 
#>  [1] /Users/jg/Desktop/PhD/DD_project/DD_git/renv/library/R-4.1/x86_64-apple-darwin17.0
#>  [2] /Library/Frameworks/R.framework/Versions/4.1/Resources/library
#> 
#>  V ── Loaded and on-disk version mismatch.
#>  P ── Loaded and on-disk path mismatch.
#> 
#> ──────────────────────────────────────────────────────────────────────────────